{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "450e0eda-6e4e-4968-a839-f16361789012",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from transformers import BertTokenizer, BertForSequenceClassification\n",
    "from transformers import pipeline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4ec3158d-918d-4f3e-bdc2-3c392b93f235",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "finbert = BertForSequenceClassification.from_pretrained('./finbert-tone',num_labels=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "413b84c9-71f1-4643-9915-c52653467847",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "tokenizer = BertTokenizer.from_pretrained('./finbert-tone')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b53306d4-914c-4c8e-8f01-d38539f837d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "nlp = pipeline(\"sentiment-analysis\", model=finbert, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "dc773fba-8c7a-4c79-a3f4-f6b99049fdd3",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentences = [\"there is a shortage of capital, and we need extra financing\",  \n",
    "             \"growth is strong and we have plenty of liquidity\", \n",
    "             \"there are doubts about our finances\", \n",
    "             \"profits are flat\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a99f5720-5219-4ae7-a14b-d9b822c902b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = nlp(sentences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7628a1c4-ddcf-499a-ae99-62a2ecdc25c4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'label': 'Negative', 'score': 0.9966173768043518}, {'label': 'Positive', 'score': 1.0}, {'label': 'Negative', 'score': 0.9999710321426392}, {'label': 'Neutral', 'score': 0.9889441728591919}]\n"
     ]
    }
   ],
   "source": [
    "print(results)  #LABEL_0: neutral; LABEL_1: positive; LABEL_2: negative"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6f373ba-295a-4031-b82d-0fc197f96525",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e931bc40-df2c-4f4c-bf56-16b2e896ef6c",
   "metadata": {},
   "outputs": [],
   "source": [
    "sents = [\"Fine. And then on Piedmont\",\" think at the time\",\" there was a little bit of disquiet about price\", \"but then someone was telling me they've got really good chewable technology\",\" And I was wondering whether that's the case and then whether there's a holistic benefit some of the other franchises about extending the life by using that technology. Is that something?\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "7aab938b-a832-4298-af56-5f2d49658702",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Fine. And then on Piedmont',\n",
       " ' think at the time',\n",
       " ' there was a little bit of disquiet about price',\n",
       " \"but then someone was telling me they've got really good chewable technology\",\n",
       " \" And I was wondering whether that's the case and then whether there's a holistic benefit some of the other franchises about extending the life by using that technology. Is that something?\"]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "1ba10289-1b8c-4ed5-a894-bf0d4c30ae45",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = nlp(sents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "4ae2cd5e-9e1c-485a-bf98-fb44e35523f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'label': 'Neutral', 'score': 0.9949359893798828},\n",
       " {'label': 'Neutral', 'score': 0.9989874958992004},\n",
       " {'label': 'Negative', 'score': 0.6592804193496704},\n",
       " {'label': 'Positive', 'score': 0.9999905824661255},\n",
       " {'label': 'Neutral', 'score': 0.9133597016334534}]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d48570f-a298-444c-bbeb-fa0147e6ec0b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "a4d8d95d-0fbd-43dc-adde-a78b7a062db4",
   "metadata": {},
   "outputs": [],
   "source": [
    "strs = \"Fine. And then on Piedmont, I think at the time, there was a little bit of disquiet about price, but then someone was telling me they've got really good chewable technology. And I was wondering whether that's the case and then whether there's a holistic benefit some of the other franchises about extending the life by using that technology. Is that something?\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "2ba7786b-f887-4b06-8011-4ead8bf69966",
   "metadata": {},
   "outputs": [],
   "source": [
    "sents = strs.split(\"，\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "0c455c98-1c7e-4422-9233-aa31d93d8d29",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[\"Fine. And then on Piedmont, I think at the time, there was a little bit of disquiet about price, but then someone was telling me they've got really good chewable technology. And I was wondering whether that's the case and then whether there's a holistic benefit some of the other franchises about extending the life by using that technology. Is that something?\"]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "3f17f1dd-2d5c-4e1b-bfb4-1023d2bb3b42",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'label': 'Neutral', 'score': 0.9414448738098145},\n",
       " {'label': 'Positive', 'score': 0.9890924692153931},\n",
       " {'label': 'Neutral', 'score': 0.950925886631012},\n",
       " {'label': 'Neutral', 'score': 0.9367497563362122}]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nlp(sents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a194bcbc-2739-43c1-89d6-b88d8a454633",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "c0ee1416-b720-433f-8ffd-ec0b744bcb2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "nltk.set_proxy(\"http://127.0.0.1:7897\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "a4f10047-b19d-4306-97a3-82a6ef763747",
   "metadata": {},
   "outputs": [],
   "source": [
    "import nltk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "343faf74-cdb9-4c56-9868-0a15b4dc4379",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to\n",
      "[nltk_data]     C:\\Users\\666\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]   Unzipping tokenizers\\punkt.zip.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk.download('punkt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "82c17549-9cd4-4db6-a1c9-7e422b591e48",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from nltk.tokenize import sent_tokenize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "55702ff8-b5a4-4e14-b3e5-a11e073b6db3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['This is sentence 1.', 'This is sentence 2.', 'And here is sentence 3.']\n"
     ]
    }
   ],
   "source": [
    "text = \"This is sentence 1. This is sentence 2. And here is sentence 3.\"\n",
    "\n",
    "sentences = sent_tokenize(text)\n",
    "print(sentences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "6f0bfa03-58d4-4963-a951-c5afd45764d0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "text = \"Fine. And then on Piedmont, I think at the time, there was a little bit of disquiet about price, but then someone was telling me they've got really good chewable technology. And I was wondering whether that's the case and then whether there's a holistic benefit some of the other franchises about extending the life by using that technology. Is that something?\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "919a5ab2-0baf-45eb-8758-7a4220f50bbf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4\n"
     ]
    }
   ],
   "source": [
    "sentences = sent_tokenize(text)\n",
    "print(len(sentences))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "e1545483-845e-4cad-9fcd-d39168c7041b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'label': 'Neutral', 'score': 0.9600534439086914},\n",
       " {'label': 'Positive', 'score': 0.995377779006958},\n",
       " {'label': 'Neutral', 'score': 0.9573672413825989},\n",
       " {'label': 'Neutral', 'score': 0.9367497563362122}]"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nlp(sentences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fbb6c3c-bf1a-443f-87d4-6d0225bb5ce2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 原始结果列表\n",
    "results_list = [{'label': 'Neutral', 'score': 0.9600534439086914},\n",
    "                {'label': 'Positive', 'score': 0.995377779006958},\n",
    "                {'label': 'Neutral', 'score': 0.9573672413825989},\n",
    "                {'label': 'Neutral', 'score': 0.9367497563362122}]\n",
    "\n",
    "# 转换为 pandas DataFrame\n",
    "df = pd.DataFrame(results_list)\n",
    "\n",
    "# 计算每个标签的平均得分\n",
    "average_scores = df.groupby('label')['score'].mean().to_dict()\n",
    "\n",
    "# 打印结果\n",
    "print(average_scores)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
